Investigating Right Whale Sighting data in Cape Cod Bay


In [1]:
import pandas as pd
import netCDF4
from mpl_toolkits.basemap import interp

In [2]:
df = pd.read_csv('/usgs/data2/notebook/right_whale.csv',index_col='DATE',parse_dates=True)

In [3]:
df.head(10)


Out[3]:
Species Code SPECIES TOTAL Latitude Longitude
DATE
1998-12-13 RIWH 1 42.05167 -70.15833
1998-12-13 RIWH 1 42.05167 -70.14833
1998-12-13 RIWH 1 41.86000 -70.26334
1998-12-13 RIWH 2 41.79167 -70.35333
1999-01-06 RIWH 1 41.83333 -70.15833
1999-01-06 RIWH 2 41.84167 -70.17834
1999-01-06 RIWH 2 41.85833 -70.17333
1999-01-06 RIWH 1 41.79167 -70.24167
1999-01-08 RIWH 1 41.77500 -70.21167
1999-01-08 RIWH 1 41.96833 -70.15166

In [6]:
# read digital terrain data for Gulf of Maine
bathy_url='http://geoport.whoi.edu/thredds/dodsC/bathy/gom03_v1_0'
nc = netCDF4.Dataset(bathy_url).variables
box = [-71.4,41,-70.2,42]
lon=nc['lon'][:]
lat=nc['lat'][:]
bi=(lon>=df['Longitude'].min())&(lon<=df['Longitude'].max())
bj=(lat>=df['Latitude'].min())&(lat<=df['Latitude'].max())
z=nc['topo'][bj,bi]
lon=lon[bi]
lat=lat[bj]

In [7]:
# interpolate terrain to find depth at Whale Sighting locations 
df['Depth']=interp(z,lon,lat,df['Longitude'],df['Latitude'])

In [8]:
df.head()


Out[8]:
Species Code SPECIES TOTAL Latitude Longitude Depth
DATE
1998-12-13 RIWH 1 42.05167 -70.15833 -6.740654
1998-12-13 RIWH 1 42.05167 -70.14833 -3.542641
1998-12-13 RIWH 1 41.86000 -70.26334 -28.271041
1998-12-13 RIWH 2 41.79167 -70.35333 -23.508510
1999-01-06 RIWH 1 41.83333 -70.15833 -6.958285

In [9]:
# plot up a histogram of depth where whales are found
hist(df['Depth']);



In [10]:
# create dataframe of shallow sightings
ishallow = where(df['Depth'] > -10.0)[0]
df_shallow = df.ix[ishallow]
df_shallow.head(10)


Out[10]:
Species Code SPECIES TOTAL Latitude Longitude Depth
DATE
1998-12-13 RIWH 1 42.05167 -70.15833 -6.740654
1998-12-13 RIWH 1 42.05167 -70.14833 -3.542641
1999-01-06 RIWH 1 41.83333 -70.15833 -6.958285
1999-01-21 RIWH 1 41.83333 -70.16167 -7.666747
1999-01-21 RIWH 2 41.83333 -70.16167 -7.666747
1999-01-26 RIWH 1 41.75167 -70.23000 -9.675799
1999-02-01 RIWH 1 41.83167 -70.10500 -9.478573
1999-02-17 RIWH 1 41.83333 -70.16666 -8.686150
2007-05-13 RIWH 2 41.80426 -70.07568 -9.200651
2007-04-25 RIWH 1 42.02145 -70.19609 -1.540242

In [9]:
figure(figsize=(12,12))
z = ma.masked_where(z>0,z)
subplot(111,aspect=(1.0/cos(mean(lat)*pi/180.0)))
pcolormesh(lon,lat,z,vmax=0)
plot(df_shallow.Longitude,df_shallow.Latitude,'ko');
plot(df_shallow.Longitude,df_shallow.Latitude,'w+');
axis([-70.6, -70.0, 41.75, 42.06])
grid()



In [10]:
# determine stats by year (or month, quarter, etc)
#df_shallow_stats = df_shallow.resample('6M', how='mean')   # monthly means
df_shallow_stats = df_shallow.resample('A-AUG', how='sum')   # annual sum, ending in august

In [11]:
df_shallow_stats['SPECIES TOTAL']


Out[11]:
DATE
1998-08-31     9
1999-08-31     9
2000-08-31     4
2001-08-31     3
2002-08-31   NaN
2003-08-31   NaN
2004-08-31     8
2005-08-31     1
2006-08-31   NaN
2007-08-31     3
2008-08-31     6
2009-08-31     4
2010-08-31     5
2011-08-31    25
Freq: A-AUG, Name: SPECIES TOTAL, dtype: float64

In [12]:
# any significant trend in time?
df_shallow_stats['SPECIES TOTAL'].plot(kind='bar',figsize=(12,5))


Out[12]:
<matplotlib.axes.AxesSubplot at 0x36ed590>

In [13]:
# average number of sightings for each month
df_monthly_sum = df_shallow.groupby(lambda x: x.month).sum()

In [14]:
df_monthly_sum['SPECIES TOTAL']


Out[14]:
1     15
2      6
3     27
4     20
5      7
12     2
Name: SPECIES TOTAL, dtype: int64

In [15]:
# group sightings by Month
grouped=df_shallow.groupby(lambda x: x.month)

In [16]:
# plot 6 frame panel of sightings in each month
iframe=0
figure(figsize=(20,10))
for month,group in grouped:
    iframe = iframe + 1
    subplot(2,3,iframe,aspect=(1.0/cos(mean(lat)*pi/180.0)))
    pcolormesh(lon,lat,z,vmax=0)
    plot(group.Longitude,group.Latitude,'ko')
    plot(group.Longitude,group.Latitude,'w+')
    axis('tight')
    #axis('off')
    grid('on')
    title('Month = %d' % month)



In [17]:
for month,group in grouped:
    group.to_csv('/usgs/data2/notebook/%s.csv' % month)

In [18]:
# plot each month separately
iframe=0
for month,group in grouped:
    iframe = iframe + 1
    #subplot(2,3,iframe,aspect=(1.0/cos(mean(lat)*pi/180.0)))
    figure(iframe,figsize=(12,12))
    subplot(111,aspect=(1.0/cos(mean(lat)*pi/180.0)))
    
    pcolormesh(lon,lat,z,vmax=0)
    plot(group.Longitude,group.Latitude,'ko')
    plot(group.Longitude,group.Latitude,'w+')
    axis([-70.6, -70.0, 41.75, 42.06])
    #axis('tight')
    #axis('off')
    grid('on')
    title('Month = %d' % month)



In [19]:
df_shallow_stats['SPECIES TOTAL'].sum()


Out[19]:
77.0

In [20]:
df_monthly_sum['SPECIES TOTAL'].sum()


Out[20]:
77

In [11]:



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-11-e17f9a5649fe> in <module>()
----> 1 df_shallow.month

/home/local/python27_epd/lib/python2.7/site-packages/pandas/core/frame.py in __getattr__(self, name)
   2005             return self[name]
   2006         raise AttributeError("'%s' object has no attribute '%s'" %
-> 2007                              (type(self).__name__, name))
   2008 
   2009     def __setattr__(self, name, value):

AttributeError: 'DataFrame' object has no attribute 'month'

In [12]:
df_shallow.index


Out[12]:
<class 'pandas.tseries.index.DatetimeIndex'>
[1998-12-13 00:00:00, ..., 2001-03-17 00:00:00]
Length: 45, Freq: None, Timezone: None

In [13]:
type(df_shallow.index)


Out[13]:
pandas.tseries.index.DatetimeIndex

In [ ]: